@article{Treiber2000,
	author = {Treiber, Martin and Hennecke, Ansgar and Helbing, Dirk},
	journal = {Physical Review E - Statistical Physics, Plasmas, Fluids, and Related Interdisciplinary Topics},
	number = {2},
	pages = {1805--1824},
	title = {{Congested traffic states in empirical observations and microscopic simulations}},
	volume = {62},
	year = {2000}
}

@article{Kesting2007,
    abstract = {A general model (minimizing overall braking induced by lane change, MOBIL) is proposed to derive lane-changing rules for discretionary and mandatory lane changes for a wide class of car-following models. Both the utility of a given lane and the risk associated with lane changes are determined In terms of longitudinal accelerations calculated with micro-scopic traffic models. This determination allows for the formulation of compact and general safety and incentive criteria for both symmetric and asymmetric passing rules. Moreover, anticipative elements and the crucial influence of velocity differences of these car-following models are automatically transferred to the lane-changing rules. Although the safety criterion prevents critical lane changes and collisions, the incentive criterion takes into account the advantages and disadvantages of other drivers associated with a lane change via the "politeness factor." The parameter allows one to vary the motivation for lane changing from purely egoistic to more cooperative driving behavior. This novel feature allows one first to prevent lane changes for a marginal advantage if they obstruct other drivers and second to let an aggressive driver induce the lane change of a slower driver ahead in order to no longer be obstructed. This phenomenon is common for asymmetric passing rules with a dedicated lane for passing. The model is applied to traffic simulations of cars and trucks with the Intelligent driver model as the underlying car-following model. An open system with an on-ramp is studied, and the resulting lanechanging rate is investigated as a function of the spatial coordinate as well as a function of traffic density.},
    author = {Kesting, Arne and Treiber, Martin and Helbing, Dirk},
    doi = {10.3141/1999-10},
    isbn = {9780309104258},
    issn = {03611981},
    journal = {Transportation Research Record},
    title = {{General lane-changing model MOBIL for car-following models}},
    year = {2007}
}

@article{Polack2017,
    author = {Polack, Philip and Altch{\'{e}}, Florent and D'Andr{\'{e}}a-Novel, Brigitte},
    isbn = {9781509048038},
    journal = {IEEE Intelligent Vehicles Symposium},
    pages = {6--8},
    title = {{The Kinematic Bicycle Model : a Consistent Model for Planning Feasible Trajectories for Autonomous Vehicles ?}},
    address   = {Los Angeles},
    year = {2017}
}

@article{Hren2008,
    author = {Hren, Jean Fran{\c{c}}ois and Munos, R{\'{e}}mi},
    journal = {Lecture Notes in Computer Science},
    title = {{Optimistic planning of deterministic systems}},
    year = {2008}
}

@inproceedings{Andrychowicz2017,
    abstract = {Dealing with sparse rewards is one of the biggest challenges in Reinforcement Learning (RL). We present a novel technique called Hindsight Experience Replay which allows sample-efficient learning from rewards which are sparse and binary and therefore avoid the need for complicated reward engineering. It can be combined with an arbitrary off-policy RL algorithm and may be seen as a form of implicit curriculum. We demonstrate our approach on the task of manipulating objects with a robotic arm. In particular, we run experiments on three different tasks: pushing, sliding, and pick-and-place, in each case using only binary rewards indicating whether or not the task is completed. Our ablation studies show that Hindsight Experience Replay is a crucial ingredient which makes training possible in these challenging environments. We show that our policies trained on a physics simulation can be deployed on a physical robot and successfully complete the task. The video presenting our experiments is available at https://goo.gl/SMrQnI.},
    archivePrefix = {arXiv},
    arxivId = {1707.01495},
    author = {Andrychowicz, Marcin and Wolski, Filip and Ray, Alex and Schneider, Jonas and Fong, Rachel and Welinder, Peter and McGrew, Bob and Tobin, Josh and Abbeel, Pieter and Zaremba, Wojciech},
    booktitle = {Advances in Neural Information Processing Systems},
    eprint = {1707.01495},
    issn = {10495258},
    title = {{Hindsight experience replay}},
    year = {2017}
}

@article{Mnih2015,
	author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
	journal = {Nature},
	number = {7540},
	pages = {529--533},
	title = {{Human-level control through deep reinforcement learning}},
	volume = {518},
	year = {2015}
}

@inproceedings{Leurent2019social,
    title = {Social Attention for Autonomous Decision-Making in Dense Traffic},
    author = {Edouard Leurent and Jean Mercat},
    year = {2019},
    booktitle = {Machine Learning for Autonomous Driving Workshop at the Thirty-third Conference on Neural Information Processing Systems (NeurIPS 2019)},
    address = {Montreal, Canada},
    month=dec,
    eprint = {1911.12250},
    archivePrefix = {arXiv},
    primaryClass = {cs.SY}
}

@misc{Qi2017pointnet,
      title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation},
      author={Charles R. Qi and Hao Su and Kaichun Mo and Leonidas J. Guibas},
      year={2017},
      eprint={1612.00593},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}